/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.db;
import java.io.*;
import java.util.*;
import net.nutch.io.*;
import net.nutch.pagedb.*;
import net.nutch.linkdb.*;
/**********************************************
* IWebDBReader is an interface to the consolidated
* page/link database. It permits all kind of read-only ops.
*
* This database may be implemented in several different
* ways, which this interface hides from its user.
*
* @author Mike Cafarella
**********************************************/
public interface IWebDBReader {
/**
* Done reading. Release a handle on the db.
*/
public void close() throws IOException;
/**
* Return a Page object with the given URL, if any.
* Pages are guaranteed to be unique by URL, so there
* can be max. 1 returned object.
*/
public Page getPage(String url) throws IOException;
/**
* Return any Pages with the given MD5 checksum. Pages
* with different URLs often have identical checksums; this
* can happen if the content has been copied, or a site
* is available under several different URLs.
*/
public Page[] getPages(MD5Hash md5) throws IOException;
/**
* Returns whether a Page with the given MD5 checksum is in the db.
*/
public boolean pageExists(MD5Hash md5) throws IOException;
/**
* Obtain an Enumeration of all Page objects, sorted by URL
*/
public Enumeration pages() throws IOException;
/**
* Obtain an Enumeration of all Page objects, sorted by MD5.
*/
public Enumeration pagesByMD5() throws IOException;
/**
* Simple count of all Page objects in db.
*/
public long numPages();
/**
* Return any Link objects that point to the given URL. This
* array can be very large if the given URL has lots of incoming
* Links. So large, in fact, that this method call will probably
* kill the process for certain URLs.
*/
public Link[] getLinks(UTF8 url) throws IOException;
/**
* Return all the Link objects that originate from a document
* with the given MD5 checksum. These will be the outlinks for
* the page of content described.
*/
public Link[] getLinks(MD5Hash md5) throws IOException;
/**
* Obtain an Enumeration of all Link objects, sorted by target
* URL.
*/
public Enumeration links() throws IOException;
/**
* Simple count of all Link objects in db.
*/
public long numLinks();
}